__license__   = 'GPL v3'
__copyright__ = '2010, limawhiskey <limawhiskey at gmail.com>'
'''
news.bbc.co.uk/sport/
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe

class BBC(BasicNewsRecipe):
    title                  = 'BBC Sport'
    __author__             = 'limawhiskey, Darko Miletic, Starson17'
    description            = 'Sports news from UK. A fast version that does not download pictures'
    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'BBC'
    category               = 'sport, news, UK, world'
    language               = 'en_GB'
    publication_type       = 'newsportal'
    extra_css = '''
        h1{font-family:Georgia,serif; font-weight:bold;font-size:large;}
        h2{font-family:Georgia,serif; font-weight:normal;font-size:small;}
        p{font-family:Georgia,serif;font-size:small;}
        b {font-family:Georgia,serif;font-size:small;font-weight:normal;}
        body{font-family:Georgia,serif;font-size:small;}
        '''
	                    
    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'linearize_tables': True
                         }

    keep_only_tags  = [
                       dict(name='div', attrs={'class':['ds','mxb']}),
                       dict(attrs={'class':['story-body','storybody']})
                      ]

    remove_tags     = [
                       dict(name='div', attrs={'class':['storyextra', 'share-help', 'embedded-hyper', \
                       'story-feature wide ', 'story-feature narrow', 'cap', 'caption', 'q1', 'sihf', \
                       'mva', 'videoInStoryC', 'sharesb', 'mvtb','mvb','bbccom_companion_text','short_table livescores2','matchLiveText','story-feature related narrow']}),
                       dict(name='div', attrs={'id':['oPPM_teamList_matchStats_Wrapper','bbccom_storyprintsponsorship']}),
                       dict(name=['img']), 
                       dict(name='p', attrs={'class':['caption','date']}),
                       dict(name=['br'])
                      ]

    remove_attributes = ['width','height']

    feeds          = [
                
                      ('Football', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml'),
                      ('Boxing', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml'),
                      ('Athletics', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml'),
                     ]

    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup